clear
//Load Data for bacon analysis




if("`c(os)'"=="Windows"){
	local dataFile = "CleanedData\FHWACleanV4.csv"
}
else{
	local dataFile = "CleanedData/FHWACleanV4.csv"
}

import delimited using "`dataFile'"

//Add State Codes to all states
sort state
egen tempStateCode = mode(statecode), by(state)
replace statecode = tempStateCode
drop tempStateCode

//Label Variables
label var logregistrations "Log Total Registrations"
label var logvmt "Log VMT"
label var loglicenseddrivers "Log Licensed Drivers" 
label var logjointpopulation "Log Total Population" 
label var logrealmeangasprice "Log Mean Gas Price" 
label var logemployment  "Log Employment"
label var logrealjointincome  "Log Total Income"
label var logmetropop  "Log (1 + Metro Population)"
label var lognonmetropop  "Log (1 + Non-Metro Population)"
label var logmetrorealinc  "Log (1 + Metro Income)"
label var lognonmetrorealinc  "Log (1 + Non-Metro Income)"
label var logrealstategdp  "Log State GDP"
label var logpopulation  "Log Population"
label var logrealtotalincome  "Log Total Income"
label var logroadmileage "Log Road Mileage"
label var nosafetyind "Treatment"


//Create Inverse Hyperbolic Sine Variables 
gen asinhMetroInc = asinh(realmetroincome2018m)
gen asinhNonMetroInc = asinh(realnonmetroincome2018m)

gen asinhMetroPop = asinh(metropopulation)
gen asinhNonMetroPop = asinh(nonmetropopulation)

//Per capita model
//construct outcome variable
gen logRegPerCapita = logregistrations-log(metropopulation + nonmetropopulation)
gen logVMTPerCapita = logvmt-log(metropopulation + nonmetropopulation)

//Label Asinh variables
label var asinhMetroInc "Asinh(Metro Income)"
label var asinhNonMetroInc "Asinh(Non-Metro Income)"

label var asinhMetroPop "Asinh(Metro Population)"
label var asinhNonMetroPop "Asinh(Non-Metro Population)"

//Label treatment variables

label define nosafetyind 0 "No Treatment" 1 "Treatment"
label values nosafetyind nosafetyind


//Generate State Dummies
egen stateGroup = group(state)

//We don't drop these data to ensure a balanced panel
//Drop Datapoints that are replications of past data for registrations
// drop if statecode=="CO" & year==2006
// drop if statecode=="IN" & (year==2006 | year==2007 | year==2009)
// drop if statecode=="MT" & year==2005
// drop if statecode=="NJ" & year==2008
// drop if statecode=="PR" & (year==2002 | year==2003 | year==2005 | year==2008 | year==2009 | year==2010)
// drop if statecode=="TX" & year==2009
// drop if statecode=="IL" & year==2011
// drop if statecode=="NH" & year==2012
// drop if statecode=="NY" & year==2012
//Drop Datapoints that are replications of past data for VMT
// drop if statecode=="MO" & year == 2003
// drop if statecode=="IN" & (year == 2004 | year==2009)
// drop if statecode=="NV" & year == 2004
// drop if statecode=="NH" & year == 2004
// drop if statecode=="NY" & year == 2005
// drop if statecode=="AZ" & year == 2009
// drop if statecode=="WY" & year == 2010
// drop if statecode=="PR" & (year == 2011 | year == 2012 | year == 2013 | year == 2015 | year == 2016)

//Drop if data is prior to 1970
drop if year<1970
//Drop year past 2017 to remove any missing data
drop if year>2017

//Normalize differenece of outcome variables for identification of extreme points

sort stateGroup year
xtset stateGroup year

gen DiffLogReg = d.logregistrations
gen DiffLogVMT = d.logvmt
gen DiffLogRegPerCapita = d.logRegPerCapita
gen DiffLogVMTPerCapita = d.logVMTPerCapita

local normVarList = "DiffLogReg DiffLogVMT DiffLogRegPerCapita DiffLogVMTPerCapita"

sort stateGroup

foreach varToNorm in `normVarList'{
	
	by stateGroup: egen tempMean = mean(`varToNorm')
	by stateGroup: egen tempSD = sd(`varToNorm')
	by stateGroup: egen tempNumData = count(`varToNorm')
	gen tempSE = tempSD/sqrt(tempNumData)
	
	gen norm`varToNorm' = (`varToNorm'-tempMean)/tempSD
	
	drop tempMean tempSD tempNumData tempSE
	
	di "`varToNorm'"
	
}

//Generate Colorado Dummy
gen coDum1 = 0
gen coDum2 = 0
replace coDum1 = 1 if statecode=="CO" & year>=2002 & year<=2009
replace coDum2 = 1 if statecode=="CO" & year>=2010


//Generate dummy variable for data footnotes
gen transactionDataDummy = 0

replace transactionDataDummy = 1 if statecode=="AR" & year>=2011
replace transactionDataDummy = 2 if statecode=="GA" & year>=2011
replace transactionDataDummy = 3 if statecode=="IA" & year>=2011
replace transactionDataDummy = 4 if statecode=="IL" & year>=2012
replace transactionDataDummy = 5 if statecode=="KY" & year>=2011
replace transactionDataDummy = 6 if statecode=="LA" & year>=2012
replace transactionDataDummy = 7 if statecode=="ME" & year>=2011 //Maine is a state with what looks to not have a consistent change
replace transactionDataDummy = 8 if statecode=="ME" & year>2014
replace transactionDataDummy = 9 if statecode=="MI" & year>=2011 //Michigan is a state with what looks to not have a consistent change
replace transactionDataDummy = 10 if statecode=="MI" & year>2012
replace transactionDataDummy = 11 if statecode=="MN" & year>=2011
replace transactionDataDummy = 12 if statecode=="NV" & year>=2011
replace transactionDataDummy = 13 if statecode=="OK" & year>=2011
replace transactionDataDummy = 14 if statecode=="SD" & year>=2011
replace transactionDataDummy = 15 if statecode=="TN" & year>=2011
replace transactionDataDummy = 16 if statecode=="TX" & year>=2011
replace transactionDataDummy = 17 if statecode=="WA" & year>=2011
replace transactionDataDummy = 18 if statecode=="WI" & year>=2011
replace transactionDataDummy = 19 if statecode=="WY" & year>=2011




//Local Data Source Controls

sort state year

python

import numpy as np
import pandas as p 
from sfi import Data 


def getDSCDummies(data, col, cutoff):

	tempExtState = None
	
	tempIndex = 0
	stateCol = 0
	
	rows, cols = data.shape
	
	dummyExt = []
	
	for i in range(rows):
		tempDiff = float(data[i,col])
		tempState = data[i,stateCol]
		
		
		if(abs(tempDiff)>cutoff):
			tempExtState = tempState
			tempIndex+=1
			
		if tempState==tempExtState:
			dummyExt.append(tempIndex)
		else:
			dummyExt.append(0)
		
	
	dummyExt = np.array(dummyExt).reshape((-1,1))
	return dummyExt

dataCols = ['state', 'year', 'normDiffLogReg', 'normDiffLogVMT', 'normDiffLogRegPerCapita', 'normDiffLogVMTPerCapita']

data = np.array(Data.get(dataCols, missingval=np.nan))


regCutoffLow = 2.93 #Spec. 4
regCutoffPrimary = 4.02 #Spec. 3
regCutoffHigh = 7 #Spec. 2

vmtCutoffLow = 2.93 #Spec. 4 
vmtCutoffPrimary = 4.02 #Spec. 3
vmtCutoffHigh = 7 #Spec. 2

regCol = dataCols.index('normDiffLogReg')
vmtCol = dataCols.index('normDiffLogVMT')

regPerCapitaCol = dataCols.index('normDiffLogRegPerCapita')
vmtPerCapitaCol = dataCols.index('normDiffLogVMTPerCapita')

rows, cols = data.shape

regDummyExtLow = getDSCDummies(data, regCol, regCutoffLow)
regDummyExtPrimary = getDSCDummies(data, regCol, regCutoffPrimary)
regDummyExtHigh = getDSCDummies(data, regCol, regCutoffHigh)

vmtDummyExtLow = getDSCDummies(data, vmtCol, vmtCutoffLow)
vmtDummyExtPrimary = getDSCDummies(data, vmtCol, vmtCutoffPrimary)
vmtDummyExtHigh = getDSCDummies(data, vmtCol, vmtCutoffHigh)

regPerCapitaDummyExtLow = getDSCDummies(data, regPerCapitaCol, regCutoffLow)
regPerCapitaDummyExtPrimary = getDSCDummies(data, regPerCapitaCol, regCutoffPrimary)
regPerCapitaDummyExtHigh = getDSCDummies(data, regPerCapitaCol, regCutoffHigh)

vmtPerCapitaDummyExtLow = getDSCDummies(data, vmtPerCapitaCol, vmtCutoffLow)
vmtPerCapitaDummyExtPrimary = getDSCDummies(data, vmtPerCapitaCol, vmtCutoffPrimary)
vmtPerCapitaDummyExtHigh = getDSCDummies(data, vmtPerCapitaCol, vmtCutoffHigh)

Data.addVarInt('regDummyExtSpec4')
Data.store("regDummyExtSpec4",None, regDummyExtLow[:])
Data.addVarInt('regDummyExtSpec3')
Data.store("regDummyExtSpec3",None, regDummyExtPrimary[:])
Data.addVarInt('regDummyExtSpec2')
Data.store("regDummyExtSpec2",None, regDummyExtHigh[:])

Data.addVarInt('vmtDummyExtSpec4')
Data.store("vmtDummyExtSpec4",None, vmtDummyExtLow[:])
Data.addVarInt('vmtDummyExtSpec3')
Data.store("vmtDummyExtSpec3",None, vmtDummyExtPrimary[:])
Data.addVarInt('vmtDummyExtSpec2')
Data.store("vmtDummyExtSpec2",None, vmtDummyExtHigh[:])


Data.addVarInt('regPerCapitaDummyExtSpec4')
Data.store("regPerCapitaDummyExtSpec4",None, regPerCapitaDummyExtLow[:])
Data.addVarInt('regPerCapitaDummyExtSpec3')
Data.store("regPerCapitaDummyExtSpec3",None, regPerCapitaDummyExtPrimary[:])
Data.addVarInt('regPerCapitaDummyExtSpec2')
Data.store("regPerCapitaDummyExtSpec2",None, regPerCapitaDummyExtHigh[:])

Data.addVarInt('vmtPerCapitaDummyExtSpec4')
Data.store("vmtPerCapitaDummyExtSpec4",None, vmtPerCapitaDummyExtLow[:])
Data.addVarInt('vmtPerCapitaDummyExtSpec3')
Data.store("vmtPerCapitaDummyExtSpec3",None, vmtPerCapitaDummyExtPrimary[:])
Data.addVarInt('vmtPerCapitaDummyExtSpec2')
Data.store("vmtPerCapitaDummyExtSpec2",None, vmtPerCapitaDummyExtHigh[:])
	
end


//Generate normalized year values to reduce differences in magnitude of covariates.
//Scale year for better numerical performance
local yearScale = 1
gen normYear = year/`yearScale'
gen quadYear = (year/1)^2

//Establish treatment interacted with pre-treatment population share in metro areas
frame copy default MetroShares
frame change MetroShares
keep if year==1970

gen preTreatMetroShare = metropopulation/(metropopulation+nonmetropopulation)

frame change default 

frlink m:1 state, frame(MetroShares)
frget preTreatMetroShare, from(MetroShares)

gen metroIntTreatment = nosafetyind*preTreatMetroShare

frame drop MetroShares
